clear all
set maxvar 120000
set seed 123

set scheme cblind1, perm

** This version::: May 2024

** Set Dir
global master_dir "/Users/maxwang/Library/CloudStorage/Dropbox/Dissertation/ISS/"

cd $master_dir

// mkdir "data"
// mkdir "data/temp"
// mkdir "visuals"

/* NOTE: 
	1. Our data are based on expeditions 1-68 (Nov 2020 to Sep 2022) and experiments took place within USOS, sponsored by NASA.
	2. When run yourself, start from line #55
*/


**  Prep: Create experiment-expedition Xwalk
u "CLEO/data/all_experiments_developer_pi.dta", clear	
keep shortname expeditions
duplicates drop
replace expeditions=subinstr(expeditions, "/", ", ", .)
rename (shortname expeditions) (experimentname expedition)
split expedition, p(", ")
drop expedition
duplicates drop
reshape long expedition, i(experimentname) j()
drop _j 
drop if mi(expedition)
merge m:1 expedition using "CLEO/data/all_expeditions_year.dta"
keep if _merge==3
drop _merge
codebook experiment 
	/// 1,340 experiments in total
sort experiment expedition_date
bys experiment: g seq_experiment = _n
destring expedition_year, replace
bys experiment: egen first_expyear = min(expedition_year)
label var seq_experiment "Mission sequence based on expedition order"
label var first_expyear "By experiment: first expedition year"
label var expedition "Expedition number"
label var expedition_date "DDMMYYYY of expedition"
label var expedition_year "Year of expedition"
duplicates drop
compress 
save "CLEO/data/xwalk_exptoexp.dta", replace


**  1. Plots
u "CLEO/data/all_experiments_developer_pi.dta", clear
g i_p = !mi(companyname) & country == "UNITED STATES"
rename short experimentname
bys experimentname: egen i_private = max(i_p)
keep expeditions experimentname fullname i_private
duplicates drop
replace expeditions=subinstr(expeditions, "/", ", ", .)
split expeditions, p(", ")
drop expeditions
reshape long expeditions, i(experimentname fullname i_private) j()
drop _j 
drop if mi(expeditions)
rename expeditions expedition
merge m:1 expedition using "CLEO/data/all_expeditions_year.dta"
keep if _merge==3
drop _merge
sort experiment expedition
bys experiment: g seq_experiment = _n
label var seq_experiment "sequence of sub-experiment based on expedition order"
duplicates drop
save "data/temp/expexp.dta", replace

**  how many experiment projects over year (Main figure 2)
u "data/temp/expexp.dta", clear
codebook experimentname
	/// 1,339 experiments 2001-2022 
codebook experimentname if i_private==1
	/// 622 have US private companies as developers and\or PIs
destring expedition_year, replace
collapse i_private (min) expedition_year, by(experimentname)
g uno = (i_private==0)
gr bar (sum) i_private uno, over(expedition_year, label(labsize(*1.8))) ///
	legend(order(1 "Private" 2 "Public") pos(12) row(1) size(*3.3) ring(0)) stack ///
	xsize(20) ysiz(6) ///
	yti("Number of experiments", size(*2)) ///
	ylab(0(30)180, labsize(*2))
gr export "CLEO/visuals/expbyyear.png", replace

**  how many expeditions over year, by share of Private developer or PI (Extended figure 1)
u "data/temp/expexp.dta", clear
destring expedition, replace
collapse i_private, by(experimentname expedition) 
g uno = (i_private==0)
	/// 68 expeditions
gr bar (sum) i_private uno, over(expedition) ///
	legend(off) stack ///
	xsize(20) ysize(7) ///
	yti("Number of experiments", size(*1.7)) ///
	ylab(0(30)180, labsize(*1.4))
gr export "CLEO/visuals/missionbyyear.png", replace

**  new vs old companies (Main figure 3)
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname experimentname
joinby experiment using "CLEO/data/xwalk_exptoexp.dta"
keep companyname experimentname first_expyear 
duplicates drop
drop if mi(comp)
codebook companyname
	/// 183 companies
sort companyname first_expyear, stable 
bys companyname: g temp = _n
preserve 
keep if temp==1
keep companyname first_expyear
duplicates drop
g uno = 1 
collapse (sum) uno, by(first_expyear)
save "temp/comp1.dta", replace
restore 
keep if temp>1
keep companyname first_expyear
duplicates drop
g dos = 1 
collapse (sum) dos, by(first_expyear)
merge 1:1 first_expyear using  "temp/comp1.dta"
replace uno = 0 if mi(uno)
gr bar uno dos, over(first_expyear, label(labsize(*1.8))) ///
	xsize(20) ysize(6) ///
	legend(order(1 "New companies" 2 "Returning companies") pos(12) row(1) ring(0) size(*3)) ///
	yti("Number of companies", size(*2))  ///
	ylab(0(5)30, labsize(*2)) 
gr export "CLEO/visuals/companybyyear.png", replace

**  geographic distribution of developers (Main figure 4 panel A)
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname experimentname
drop if mi(companyname) | mi(state) 
keep companyname state
duplicates drop
g uno = 1 
bys state: egen ntup = sum(uno)
g negtup = -ntup
replace state = upper(state)
gr hbar (sum) uno, over(state, sort(negtup) label(labsize(*1.7))) ///
	ylab(0(10)50, labsize(*2)) yti("") ///
	xsize(9) ysize(20)
gr export "CLEO/visuals/companybystate.png", replace

**  frequent flyers (Main figure 4 panel B)
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
replace shortname = upper(shortname)
rename (shortname expeditions) (experimentname expedition)
keep experimentname companyname i_*
duplicates drop
joinby experimentname using "CLEO/data/xwalk_exptoexp.dta"
keep experimentname expedition_year companyname i_*
duplicates drop
keep if !mi(companyname)
collapse (min) expedition_year (max) i_*, by(experimentname companyname)
sort companyname expedition_year
replace i_developer=0 if i_pi==1
g uno = 1
bys companyname: egen n_tup = sum(uno)
replace companyname = upper(companyname)
g negtup = -n_tup
replace companyname = "SPACE TECH AND ADVANCED RES SYS" if companyname == "SPACE TECHNOLOGY AND ADVANCED RESEARCH SYSTEMS"
gr hbar (sum) i_developer i_pi if n_tup>2, stack over(companyname, sort(n_tup) label(labsize(*1.2))) ///
	xsize(15) ysize(20) ///
	yti("") ///
	ylab(0(30)95, labsize(*1.5)) ///
	legend(order(1 "Developer" 2 "PI") pos(12) row(1) size(*1.6))
gr export "CLEO/visuals/companybyexp.png", replace


**  2. Development of ISSNL ecosystem

**  2.1. Papers trend
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname short
replace short = upper(short)
g i_private = !mi(companyname)
collapse (max) i_private, by(short) 
merge 1:1 short using "CLEO/data/expyear.dta"
keep if _merge==3 
drop _merge
label var expedition_year "Year first expedition"
label var expedition_date "Dat first expedition"
merge 1:m short using "CLEO/data/Final2023.dta", keepusing(pmid doi year)
keep if _merge!=2
drop _merge
keep if inrange(year, 2001, 2021)
keep if year >= expedition_year
g year_diff = year-expedition_year
sum year_diff
	/// 4.6 years
codebook pmid doi
	/// 384 biomed and 45 physcial sciences papers 
codebook pmid doi if i_private==1
	/// 130 biomed and 38 physcial sciences papers with private companies 
egen uno = tag(short pmid)
egen dos = tag(short doi)
collapse (sum) n_pmid=uno n_doi=dos, by(year i_private)
reshape wide n_pmid n_doi, i(year) j(i_private)

* (Main figure 5)
gr bar n_pmid1 n_pmid0, over(year, label(labsize(*2))) stack ///
	xsize(20) ysize(6) ///
	legend(order(1 "Private" 2 "Public") pos(12) row(1) ring(0) size(*3.2)) ///
	yti("Number of paper publications", size(*1.8)) ///
	b1title("Year of publication", size(*1.5)) ///
	ylab(0(10)60, labsize(*2))
gr export "CLEO/visuals/pubpmidbyyear.png", replace	

* (Extended figure 2)
gr bar n_doi1 n_doi0, over(year, label(labsize(*1.1))) stack ///
	xsize(20) ysize(9) ///
	legend(order(1 "Private" 2 "Public") pos(12) row(1) size(*2)) ///
	yti("Number of physical sciences publications", size(*1.5)) ///
	b1title("Year of publication") ///
	ylab(0(2)13)
gr export "CLEO/visuals/pubapsbyyear.png", replace	

**  2.2. Patents trend
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname short
replace short = upper(short)
g i_private = !mi(companyname)
collapse (max) i_private, by(short) 
merge 1:1 short using "CLEO/data/expyear.dta"
keep if _merge==3 
drop _merge
label var expedition_year "Year first expedition"
label var expedition_date "Dat first expedition"
merge 1:m short using "CLEO/data/Final2023.dta", keepusing(patent*)
keep if _merge!=2
drop _merge 
merge m:1 patent_id using "CLEO/data/patent_date.dta"
keep if _merge !=2
drop _merge
g delay = grant_date - expedition_date
drop if delay < 0
g grant_year = year(grant_date)
keep if inrange(grant_year, 2001, 2021)
codebook patent_id if i_private==1
codebook patent_id if i_private==0
	/// 38 public, 3 private patents
sum delay if delay>0, d
	/// average delay is 2.67 years
preserve 
drop if mi(patent_id)
keep short i_private n_expedition expedition_year expedition_date patent_id grant_date
duplicates drop
g i_directpatent = 1 
append using "temp/p1.dta"
replace i_directpatent = 0 if mi(i_directpatent)
label var i_directpatent "=1 if directly from ISS experiments"
save "temp/iss_patent_analysis.dta", replace
restore
keep patent_id grant_year i_private
g dos = i_priva==0
g uno = i_priva
collapse (sum) uno dos, by(grant_year)
g iid = 1
reshape wide uno dos, i(iid) j(grant_year)
forval i=2001/2021{
	capture confirm var uno`i' 
	if c(rc) == 111 {
		g uno`i' = 0
		g dos`i' = 0
	}
}
reshape long uno dos, i(iid) j(_j)
rename _j grant_year

* (Main figure 6 panel A)
gr bar uno dos, over(grant_year, label(labsize(*1.6))) b1title("Patent grant year", size(*1.3)) stack ///
	legend(order(1 "Private" 2 "Public") pos(9) row(2) ring(0) size(*3)) ///
	yti("Number of patents", size(*1.8)) xsize(20) ysize(7) ///
	ylab(0(4)20, labsize(*1.8)) ///
	subti("Panel A. Patents directly reported to NASA", size(*1.5))
gr save "temp/patdirect.gph", replace

u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname short
replace short = upper(short)
g i_private = !mi(companyname)
collapse (max) i_private, by(short) 
merge 1:1 short using "CLEO/data/expyear.dta"
keep if _merge==3 
drop _merge
label var expedition_year "Year first expedition"
label var expedition_date "Dat first expedition"
merge 1:m short using "CLEO/data/Final2023.dta", keepusing(pmid date year)
keep if year > expedition_year
keep if _merge!=2
drop _merge 
keep if !mi(pmid)
collapse (max) i_private, by(pmid date)
destring pmid, replace
merge 1:m pmid using "CLEO/data/pmid_patent_pair_ros.dta"
keep if _merge==3
drop _merge 
codebook pmid if !mi(patent_id)
	/// 21 papers cited by patents
codebook patent_id 
	/// by 57 patents, so in total 57+41=98 ISS patents
keep patent_id i_private
duplicates drop
merge m:1 patent_id using "CLEO/data/patent_date.dta"
keep if _merge==3
drop _merge
g year = year(grant_date)
g uno = i_private
g dos = i_private==0
collapse (sum) uno dos, by(year)
g iid = 1
reshape wide uno dos, i(iid) j(year)
forval i=2001/2021{
	capture confirm var uno`i' 
	if c(rc) == 111 {
		g uno`i' = 0
		g dos`i' = 0
	}
}
reshape long uno dos, i(iid) j(_j)
rename _j grant_year

* (Main figure 6 panel B)
gr bar uno dos, over(grant_year, label(labsize(*1.6))) b1title("Patent grant year", size(*1.3)) stack ///
	legend(order(1 "Private" 2 "Public") pos(9) row(2) ring(0) size(*3)) ///
	yti("Number of patents", size(*1.5)) xsize(20) ysize(7) ///
	ylab(0(4)20, labsize(*1.8)) ///
	subti("Panel B. Patents derived from paper-patent pairs", size(*1.5))
gr save "temp/patindirect.gph", replace

grc1leg2 "temp/patdirect.gph" "temp/patindirect.gph", ///
	legendfrom("temp/patdirect.gph") ///
	xsize(20) ysize(9) col(1) pos(9) ycomm 
gr export "CLEO/visuals/patentdirect.png", replace


**  3. Regressions:: paper and author analysis using citation

** 3.0. Identify all ISS pubs of interest
u "CLEO/data/all_experiments_developer_pi.dta", clear
keep if country == "UNITED STATES"
rename shortname short
replace short = upper(short)
g i_private = !mi(companyname)
collapse (max) i_private, by(short) 
merge 1:1 short using "CLEO/data/expyear.dta"
keep if _merge==3 
drop _merge
merge 1:m short using "CLEO/data/Final2023.dta", keepusing(pmid year date)
keep if _merge==3
keep if !mi(pmid)
drop _merge 
keep if year > expedition_year
rename short experimentname 
joinby experimentname using "CLEO/data/xwalk_exptoexp.dta", unm(master)
keep experimentname pmid expedition first_expyear year i_private  
duplicates drop
destring expedition, replace
egen n_expedition = tag(expedition pmid)
collapse year (min) first_expyear (max) i_private (sum) n_expedition, by(pmid)
label var n_expedition "Number of flights"
label var first_expyear "First flight year"
label var year "Publication year"
label var i_private "Private developer or PI indicator"
save "temp/t1.dta", replace

** 3.1. comp group by author and year
u "temp/t1.dta", clear
keep pmid
merge 1:m pmid using "CLEO/data/all_authorlongindexed.dta", keepusing(author_id)
g i_iss = (_merge==3)
bys author_id: egen i_focal = max(i_iss)
keep if i_focal == 1
drop if i_iss == 1 
keep author_id pmid
duplicates drop
merge m:1 pmid using "CLEO/data/pubyear.dta", keepusing(year)
keep if _merge==3
drop _merge
rename (pmid year) (comp_pmid comp_year)
save "temp/comp_pmid2.dta", replace

u "temp/t1.dta", clear
keep pmid year
merge 1:m pmid using "CLEO/data/all_authorlongindexed.dta", keepusing(author_id)
keep if _merge == 3
drop _merge 
duplicates drop
joinby author_id using "temp/comp_pmid2.dta"
drop if pmid == comp_pmid
sort author_id pmid comp_pmid, stable 
bys author_id pmid: g iid = _n
reshape wide comp_pmid comp_year, i(pmid author_id) j(iid)
forval i = 1/75 {
	g year_diff`i' = year - comp_year`i' if !mi(comp_year`i')
}
reshape long comp_pmid comp_year year_diff, i(pmid author_id) j()
drop _j 
drop if mi(comp_pmid)
keep author_id comp_pmid comp_year year_diff 
collapse (min) year_diff, by(author_id comp_pmid comp_year)
rename comp_* *
label var year_diff "Difference in year between comparison paper and ISS paper"
save "temp/comp_pmid3.dta", replace

u "temp/t1.dta", clear
g i_iss=1
merge 1:m pmid using "CLEO/data/all_authorlongindexed.dta", keepusing(author_id)
keep if _merge == 3
drop _merge 
append using "temp/comp_pmid3.dta"
replace i_iss = 0 if mi(i_iss)
merge m:1 pmid using "CLEO/data/paper_paper_citation.dta", keepusing(n_citation n4_citation)
keep if _merge != 2 
drop _merge 
replace n_citation = 0 if mi(n_citation)
replace n4_citation = 0 if mi(n4_citation)
g citation_rate_y = n_citation / (2023 - year)
destring pmid, replace
joinby pmid using "CLEO/data/pmid_patent_pair_ros.dta", unm(master)
drop _merge 
egen uno = tag(pmid patent_id)
bys pmid: egen n_patci = sum(uno)
drop uno patent_id
duplicates drop
replace n_patci = 0 if mi(n_patci)
g patent_rate_y = n_patci / (2023 - year)
tostring pmid, replace 
merge m:1 pmid using "CLEO/data/all_authorcountwide.dta"
drop if _merge==2
drop _merge 
merge m:1 pmid using "CLEO/data/all_journalinfo.dta", keepusing(journal)
keep if _merge==3
drop _merge
preserve 
keep pmid
duplicates drop 
merge 1:1 pmid using "CLEO/data/pmidroot.dta"
keep if _merge==3
drop _merge 
reshape long root, i(pmid) j()
drop if root==0
drop root
g root = 1 if inrange(_j, 1, 21)
replace root = 2 if inrange(_j, 22, 26)
replace root = 3 if inrange(_j, 27, 49)
replace root = 4 if inrange(_j, 50, 65)
replace root = 5 if inrange(_j, 66, 72)
replace root = 6 if inrange(_j, 73, 76)
replace root = 7 if inrange(_j, 77, 93)
replace root = 8 if inrange(_j, 94, 95)
replace root = 9 if inrange(_j, 96, 98)
replace root = 10 if inrange(_j, 99, 101)
replace root = 11 if inrange(_j, 102, 102)
replace root = 12 if inrange(_j, 103, 103)
replace root = 13 if inrange(_j, 104, 104)
replace root = 14 if inrange(_j, 105, 110)
replace root = 15 if inrange(_j, 111, 111)
drop _j
duplicates drop
egen uno = tag(pmid root)
bys pmid: egen breath = sum(uno)
drop uno
g iid = 1
reshape wide iid, i(pmid) j(root)
foreach v of varlist iid* {
	replace `v' = 0 if mi(`v')
}
egen root = concat(iid*) 
merge 1:1 pmid using "CLEO/data/pubyear.dta", keepusing(year)
keep if _merge==3
sort year
g uno = 1
bys root: egen subject_freq = sum(uno)
drop _merge iid* uno
save "data/temp/pmidroot2.dta", replace
restore
replace year_diff=0 if mi(year_diff)
joinby pmid using "data/temp/pmidroot2.dta", unm(master)
drop _merge
duplicates drop
g i_public = (i_private==0 & i_iss==1)
bys author_id: egen i_publicsample = max(i_public)
bys author_id: egen i_privatesample = max(i_private)
replace i_public=0 if mi(i_public) & !mi(i_publicsample)
replace i_private=0 if mi(i_private) & !mi(i_privatesample)
bys pmid: egen yff = min(year_diff)
collapse year first_expyear i_private n_expedition i_iss n_citation citation_rate_y n_author breath subject_freq i_public i_publicsample i_privatesample (min) yff (firstnm) journal, by(author_id pmid)
rename yff year_diff
label var n_expedition "Number of flights"
label var first_expyear "First flight year"
label var year "Publication year"
label var breath "Number of unique MeSH prefixes"
label var subject_freq "Popularity of MeSH combination"
label var i_private "Private developer or PI indicator"
label var i_iss "ISS paper indicator"
label var n_author "Number of coauthors"
keep if inrange(year, 1992, 2021)
codebook pmid author_id if i_iss==1
	/// 212 ISS pubs by 694 authors
codebook pmid if i_iss==0
	/// 1,228 pubs by the same set of authors on earth  
save "data/temp/working_publication2.dta", replace  

* Produce tables::
*1) ISS vs non_ISS
est clear
u "data/temp/working_publication2.dta", clear
isid pmid author_id
keep if inrange(year_diff, -5, 5)
encode journal, g(jid)
foreach var of varlist *_y n_citation {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}

* iss papers vs others
preserve 
keep *citation* pmid i_iss year n_author jid breath subject_freq
duplicates drop
eststo a2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust)
regsave using "data/temp/paper1.dta", replace ci p level(95) addlabel(subsample, a2, data, ISS)
eststo a3: ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year jid) vce(robust) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, a3, data, ISS)
eststo a1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
restore 

eststo b2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, b2, data, ISS)
eststo b3:ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, b3, data, ISS)
eststo b1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

estfe a* b*
return list 

esttab a1 a2 a3 b1 b2 b3, star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2 p, labels("N" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_author breath subject_freq) order(i_iss i_private n_author breath subject_freq)
	
* public iss
u "data/temp/working_publication2.dta", clear
keep if i_publicsample == 1
keep if inrange(year_diff, -5, 5)
encode journal, g(jid)
foreach var of varlist *_y n_citation {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}

preserve 
keep *citation* pmid i_iss year n_author jid breath subject_freq
duplicates drop
eststo c2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, c2, data, Public)
eststo c3: ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year jid) vce(robust) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, c3, data, Public)
eststo c1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
restore 

eststo d2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, d2, data, Public)
eststo d3: ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, d3, data, Public)
eststo d1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

estfe c* d*
return list 

esttab c1 c2 c3 d1 d2 d3, star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2 p, labels("N" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_author breath subject_freq) order(i_iss i_private n_author breath subject_freq)
	
* private iss
u "data/temp/working_publication2.dta", clear
keep if i_privatesample == 1
keep if inrange(year_diff, -5, 5)
encode journal, g(jid)
foreach var of varlist *_y n_citation {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}

preserve 
keep *citation* pmid i_iss year n_author jid breath subject_freq
duplicates drop
eststo e2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, e2, data, Private)
eststo e3: ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year jid) vce(robust) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, e3, data, Private)
eststo e1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
restore 

eststo f2: reghdfe l_n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, f2, data, Private)
eststo f3: ppmlhdfe citation_rate_y i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, f3, data, Private)
eststo f1: ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

estfe e* f*
return list 

esttab e1 e2 e3 f1 f2 f3, star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2 p, labels("N" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_author breath subject_freq) order(i_iss i_private n_author breath subject_freq)

* private vs public
u "data/temp/working_publication2.dta", clear
keep if i_iss == 1
isid pmid author_id
encode journal, g(jid)
foreach var of varlist *_y n_citation {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}

preserve 
keep *citation* pmid i_private year n_author jid breath subject_freq
duplicates drop
eststo g2: reghdfe l_citation_rate_y i_private n_author breath subject_freq, a(year jid) vce(robust)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, g2, data, Public-private)
eststo g3: ppmlhdfe citation_rate_y i_private n_author breath subject_freq, a(year jid) vce(robust) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, g3, data, Public-private)
eststo g1: ppmlhdfe n_citation i_private n_author breath subject_freq, a(year jid) vce(robust) 
restore 

eststo h2: reghdfe l_citation_rate_y i_private n_author breath subject_freq, a(year author_id jid) cluster( author_id)
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, h2, data, Public-private)
eststo h3: ppmlhdfe citation_rate_y i_private n_author breath subject_freq, a(year author_id jid) cluster( author_id) 
regsave using "data/temp/paper1.dta", append ci p level(95) addlabel(subsample, h3, data, Public-private)
eststo h1: ppmlhdfe n_citation i_private n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

estfe g* h*
return list 

esttab a1 c1 e1 g1 b1 d1 f1 h1 , star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2 p, labels("N" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss i_private n_author breath subject_freq) order(i_iss i_private n_author breath subject_freq)

* (Extended table 3)
esttab a1 c1 e1 g1 b1 d1 f1 h1  using "CLEO/visuals/pubreg1.tex", replace ///
	label se stats(N r2_p, labels("N" "Pseudo R2") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss i_private n_author breath subject_freq) order(i_iss i_private n_author breath subject_freq) ///
	indicate("Publication year FE = 0.year" "Journal FE = 0.jid" "Author FE = 0.author_id" ) ///
	nocons nomtitle star(* .10 ** .05 *** .01) ///
	style(tex) nogaps noisily b(%15.3gc) ///
	prehead(\begin{table}[htbp]\centering ///
	\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} ///
	\caption{\centering Estimating the scientific impact of ISS experiments using paper citations \label{reg1}} ///
	\setlength{\tabcolsep}{10pt} ///
	\renewcommand{\arraystretch}{1.1} ///
	\resizebox{\textwidth}{!}{ ///
		\begin{tabular}{l*{9}{c}} ///
		\hline\hline ///
		& \multicolumn{8}{c}{Dependent variable = $\phi$} ///
		\\ \cmidrule(r){2-9} ///
		& \multicolumn{4}{c}{\shortstack{Panel (A)\\Paper level models}} & \multicolumn{4}{c}{\shortstack{Panel (B)\\Author-paper level models}} ///
		\\ \cmidrule(r){2-5} \cmidrule(r){6-9} ///
		) ///
		mgroups("ISS vs. Non-ISS" "Public ISS" "Private ISS" "Public vs. Private" "ISS vs. Non-ISS" "Public ISS" "Private ISS" "Public vs. Private", pattern(1 1 1 1 1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span) ///
		postfoot(\hline\hline ///
		\end{tabular} ///
		\end{tabular} ///
	} ///
	\end{table} ///
	)	

* alternative specifications (reg2)
u "data/temp/paper1.dta", clear
keep if var=="i_iss" | var =="i_private"
g order = _n
replace order = (order-17) * -1
sort order
order var ci_lower ci_upper subsample data r2 order
texsave using "CLEO/visuals/pubreg2.tex", replace

* (Main figure 8)
est clear
forval i = 1/10 {
	u "data/temp/working_publication2.dta", clear
	keep if inrange(year_diff, -`i', `i')
	encode journal, g(jid)
	preserve 
	keep *citation* pmid i_iss year n_author jid breath subject_freq
	duplicates drop
	eststo iss1_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
	restore 
	eststo iss2_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

	u "data/temp/working_publication2.dta", clear
	keep if inrange(year_diff, -`i', `i')
	keep if i_publicsample == 1
	encode journal, g(jid)
	preserve 
	keep *citation* pmid i_iss year n_author jid breath subject_freq
	duplicates drop
	eststo pub1_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
	restore 
	eststo pub2_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 

	u "data/temp/working_publication2.dta", clear
	keep if inrange(year_diff, -`i', `i')
	keep if i_privatesample == 1
	encode journal, g(jid)
	preserve 
	keep *citation* pmid i_iss year n_author jid breath subject_freq
	duplicates drop
	eststo pri1_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year jid) vce(robust) 
	restore 
	eststo pri2_`i': ppmlhdfe n_citation i_iss n_author breath subject_freq, a(year author_id jid) cluster( author_id) 
}

coefplot (iss1_1, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_3, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(iss1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_5, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(iss1_6, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(iss1_7, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(iss1_8, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(iss1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(a) ISS vs. Non-ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap1_1.gph", replace	

coefplot (pub1_1, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_3, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_5, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_6, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_7, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_8, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(b) Public ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap1_2.gph", replace	

coefplot (pri1_1, keep(i_iss) msymbol(D) msize(*3.5) mc(navy) ciopts(lwidth(*4.5) lcolor(navy))) ///
	(pri1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_3, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_5, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_6, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_7, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_8, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(c) Private ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap1_3.gph", replace	

coefplot (iss2_1, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_2, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_3, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_4, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_5, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_6, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_7, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_8, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_9, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(iss2_10, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(a) ISS vs. Non-ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap2_1.gph", replace	

coefplot (pub2_1, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pub2_2, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pub2_3, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_4, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_5, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_6, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_7, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_8, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_9, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_10, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(b) Public ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap2_2.gph", replace	

coefplot (pri2_1, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_2, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_3, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_4, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_5, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_6, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_7, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_8, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_9, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_10, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(c) Private ISS", size(*2)) coeflabels(i_iss=" ") ylab(-0.5(0.5)1.5, labsize(*2))
gr save "temp/pap2_3.gph", replace	

grc1leg2 "temp/pap1_1.gph" "temp/pap1_2.gph" ///
	"temp/pap1_3.gph", ///
	loff row(1) ///
	xsize(20) ysize(7) xcomm ycomm 
gr export "CLEO/visuals/papcoef1.png", replace

grc1leg2 "temp/pap2_1.gph" "temp/pap2_2.gph" ///
	"temp/pap2_3.gph", ///
	loff row(1) ///
	xsize(20) ysize(7) xcomm ycomm 
gr export "CLEO/visuals/papcoef2.png", replace


** 3.2. Patent analysis
u "temp/iss_patent_analysis.dta", clear
collapse (max) n_expedition i_private i_directpatent (min) first_year=expedition_year, by(patent_id)
merge 1:m patent_id using "CLEO/data/g_inventor_disambiguated.dta"
g i_iss = (_merge==3)
bys inventor_id: egen i_focal = max(i_iss)
keep if i_focal == 1
drop if i_iss == 1 
keep inventor_id patent_id
duplicates drop
merge m:1 patent_id using "CLEO/data/patent_date.dta"
keep if _merge==3
g patent_year = year(grant_date)
drop _merge grant_date
rename (patent_id patent_year) (comp_patent_id comp_patent_year)
save "temp/comp_patent.dta", replace

u "temp/iss_patent_analysis.dta", clear
g patent_year = year(grant_date)
keep patent_id patent_year
duplicates drop
merge 1:m patent_id using "CLEO/data/g_inventor_disambiguated.dta", keepusing(inventor_id)
keep if _merge == 3
drop _merge 
joinby inventor_id using "temp/comp_patent.dta"
drop if patent_id == comp_patent_id
sort inventor_id patent_id comp_patent_id, stable 
bys inventor_id patent_id: g iid = _n
reshape wide comp_patent_id comp_patent_year, i(patent_id inventor_id) j(iid)
forval i = 1/334 {
	g year_diff`i' = patent_year - comp_patent_year`i' if !mi(comp_patent_year`i')
}
reshape long comp_patent_id comp_patent_year year_diff, i(patent_id inventor_id) j()
drop _j 
drop if mi(comp_patent_id)
keep inventor_id comp_patent_id comp_patent_year year_diff 
collapse (min) year_diff, by(inventor_id comp_patent_id comp_patent_year)
rename comp_* *
label var year_diff "Difference in year between comparison patent and ISS patents"
save "temp/comp_patent2.dta", replace

u "temp/iss_patent_analysis.dta", clear
g year = year(grant_date)
keep if year>=expedition_year
collapse (max) n_expedition i_private i_directpatent (min) first_expyear=expedition_year, by(patent_id)
g i_iss = 1
duplicates drop
merge 1:m patent_id using "CLEO/data/g_inventor_disambiguated.dta", keepusing(inventor_id)
keep if _merge == 3
drop _merge 
append using "temp/comp_patent2.dta"
replace i_iss=0 if mi(i_iss)
joinby patent_id using "CLEO/data/patent_patent_citation.dta", unm(master) 
drop _merge
drop patent_year patent_grant_year
merge m:1 patent_id using "CLEO/data/patent_date.dta"
keep if _merge == 3
g patent_year = year(grant_date)
egen uno = tag(patent_id citing_patent_id)
bys patent_id: egen n_patci = sum(uno)
g patent_cite_y = n_patci / (2023-patent_year)
drop _merge citing_patent_id citing_patent_year uno
duplicates drop
drop grant_date 
merge m:1 patent_id using "CLEO/data/all_inventorcountwide.dta"
drop if _merge==2
drop _merge 

preserve 
keep patent_id
duplicates drop 
destring patent_id, replace
merge 1:1 patent_id using "CLEO/data/g_cpc_current.dta"
keep if _merge==3
drop _merge 
rename iid* cpc_*
foreach v of varlist cpc_* {
	replace `v' = 0 if mi(`v')
}
egen breath = rowtotal(cpc_*)
tostring cpc_*, replace 
egen string_cpc = concat(cpc*)
drop cpc_*
tostring patent_id, replace
merge 1:1 patent_id using "CLEO/data/patent_date.dta", keepusing(grant_date)
keep if _merge==3
g patent_year = year(grant_date)
drop grant_date
sort patent_year
g uno = 1
bys string_cpc: egen subject_freq = sum(uno)
label var breath "Number of CPC classes"
label var subject_freq "Class frequency"
drop _merge uno
save "data/temp/patentroot.dta", replace
restore

replace year_diff=0 if mi(year_diff)
joinby patent_id using "data/temp/patentroot.dta", unm(master)
drop _merge string_cpc 
duplicates drop
g i_public = (i_private==0 & i_iss==1)
bys inventor_id: egen i_publicsample = max(i_public)
bys inventor_id: egen i_privatesample = max(i_private)
bys inventor_id: egen i_directsample = max(i_directpatent)
replace i_public=0 if mi(i_public) & !mi(i_publicsample)
replace i_private=0 if mi(i_private) & !mi(i_privatesample)
replace i_directpatent=0 if mi(i_directpatent) & !mi(i_privatesample)
bys patent_id: egen yff = min(year_diff)
collapse n_expedition i_public i_private i_directpatent first_expyear i_iss patent_year n_patci patent_cite_y n_inventor breath subject_freq i_publicsample i_privatesample i_directsample (min) yff, by(inventor_id patent_id)
rename yff year_diff
label var n_expedition "Number of flights"
label var first_expyear "First flight year"
label var patent_year "Patent grant year"
label var breath "Number of unique patent classes"
label var i_private "Private developer or PI indicator"
label var i_public "Public developer or PI indicator"
label var i_iss "ISS paper indicator"
label var i_directpatent "Direct patent indicator"
label var subject_freq "Popularity of patent class"
label var n_inventor "Number of inventors"
keep if inrange(patent_year, 1992, 2021)
codebook patent_id inventor_id if i_iss==1
	/// 98 ISS patents by 145 inventors
codebook patent_id inventor_id if i_directpatent==1
	/// 41 patents by 66 inventors
codebook patent_id inventor_id if i_directpatent==0 & i_iss==1
	/// 57 patents by 79 inventors
codebook patent_id if i_iss==0
	/// 1,663 patents by the same set of inventors on earth  
save "data/temp/working_patent.dta", replace  

* iss patents vs others
u "data/temp/working_patent.dta", clear
keep if inrange(year_diff, -5, 5)
est clear 
foreach var of varlist patent_cite_y n_patci {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}	
preserve 
keep *patent_cite_y *n_patci i_iss n_inventor breath subject_freq patent_year patent_id i_directsample
duplicates drop 
eststo a2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", replace ci p level(95) addlabel(subsample, a2, data, ISS)
eststo a3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, a3, data, ISS)
eststo a1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
restore 

eststo b2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, b2, data, ISS)
eststo b3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, b3, data, ISS)
eststo b1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year  inventor_id) clu(inventor_id)
	
esttab a1 a2 a3 b1 b2 b3 , star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2_p r2 p, labels("N" "Pseudo R2" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_inventor breath subject_freq) order(i_iss n_inventor breath subject_freq) 

estfe a* b*
return list 

* public patents vs others
u "data/temp/working_patent.dta", clear
keep if inrange(year_diff, -5, 5)
keep if i_publicsample==1
foreach var of varlist patent_cite_y n_patci {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}	

preserve 
keep *patent_cite_y *n_patci i_iss n_inventor breath subject_freq patent_year patent_id
duplicates drop 
eststo c2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, c2, data, Public)
eststo c3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, c3, data, Public)
eststo c1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
restore 

eststo d2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, d2, data, Public)
eststo d3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, d3, data, Public)
eststo d1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	
esttab c1 c2 c3 d1 d2 d3 , star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2_p r2 p, labels("N" "Pseudo R2" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_inventor breath subject_freq) order(i_iss n_inventor breath subject_freq) 

estfe c* d*
return list 

* private patents vs others
u "data/temp/working_patent.dta", clear
keep if inrange(year_diff, -5, 5)
keep if i_privatesample==1
foreach var of varlist patent_cite_y n_patci {
	g l_`var' = log(`var' + sqrt(`var'^2+1))
}	
count if i_private==1 & n_patci>0

preserve 
keep *patent_cite_y *n_patci i_iss n_inventor breath subject_freq patent_year patent_id
duplicates drop 
eststo e2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, e2, data, Private)
eststo e3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, e3, data, Private)
eststo e1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year ) vce(robust)
restore 

eststo f2: reghdfe l_n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, f2, data, Private)
eststo f3: ppmlhdfe patent_cite_y i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/patent1.dta", append ci p level(95) addlabel(subsample, f3, data, Private)
eststo f1: ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	
estfe e* f*
return list 

esttab a1 c1 e1 b1 d1 f1 , star(* .10 ** .05 *** .01) varwidth(45) label se ///
	stats(N r2_p r2 p, labels("N" "Pseudo R2" "R2" "Joint test p-value") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_inventor breath subject_freq) order(i_iss n_inventor breath subject_freq) 

* (Extended table 5)
esttab a1 c1 e1 b1 d1 f1  using "CLEO/visuals/pubreg3.tex", replace ///
	label se stats(N r2_p, labels("N" "Pseudo R2") fmt(%20.0f %20.2f %20.2f)) ///
	keep(i_iss n_inventor breath subject_freq) order(i_iss n_inventor breath subject_freq) ///
	indicate("Patent year FE = 0.patent_year" "Inventor FE = 0.inventor_id" ) ///
	nocons nomtitle star(* .10 ** .05 *** .01) ///
	style(tex) nogaps noisily b(%15.3gc) ///
	prehead(\begin{table}[htbp]\centering ///
	\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} ///
	\caption{\centering Estimating the economic impact of ISS experiments using patent citations \label{reg3}} ///
	\setlength{\tabcolsep}{10pt} ///
	\renewcommand{\arraystretch}{1.1} ///
	\resizebox{\textwidth}{!}{ ///
		\begin{tabular}{l*{7}{c}} ///
		\hline\hline ///
		& \multicolumn{6}{c}{Dependent variable = $\Bar{\delta}$} ///
		\\ \cmidrule(r){2-7} ///
		& \multicolumn{3}{c}{\shortstack{Panel (A)\\Paper level models}} & \multicolumn{3}{c}{\shortstack{Panel (B)\\Author-paper level models}} ///
		\\ \cmidrule(r){2-4} \cmidrule(r){5-7} ///
		) ///
		mgroups("ISS vs. Non-ISS" "Public ISS" "Private ISS" "ISS vs. Non-ISS" "Public ISS" "Private ISS", pattern(1 1 1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span) ///
		postfoot(\hline\hline ///
		\end{tabular} ///
		\end{tabular} ///
	} ///
	\end{table} ///
	)	
	
* alternative specifications (reg4)
u "data/temp/patent1.dta", clear
keep if var=="i_iss" 
g order = _n
replace order = (order-13) * -1
sort order
order var ci_lower ci_upper subsample data r2 order
texsave using "CLEO/visuals/patentreg2.tex", replace

* (Main figure 9)
est clear
forval i = 1/10 {
	u "data/temp/working_patent.dta", clear
	keep if inrange(year_diff, -`i', `i')
	preserve 
	keep *patent_cite_y n_patci i_iss n_inventor breath subject_freq patent_year patent_id
	duplicates drop 
	eststo iss1_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year) vce(robust)
	restore 
	eststo iss2_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	
	u "data/temp/working_patent.dta", clear
	keep if i_publicsample==1
	keep if inrange(year_diff, -`i', `i')
	preserve 
	keep *patent_cite_y n_patci i_iss n_inventor breath subject_freq patent_year patent_id
	duplicates drop 
	eststo pub1_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year) vce(robust)
	restore 
	eststo pub2_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	
	u "data/temp/working_patent.dta", clear
	keep if i_privatesample==1
	keep if inrange(year_diff, -`i', `i')
	preserve 
	keep *patent_cite_y n_patci i_iss n_inventor breath subject_freq patent_year patent_id
	duplicates drop 
	eststo pri1_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year) vce(robust)
	restore 
	eststo pri2_`i': ppmlhdfe n_patci i_iss n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
}

coefplot (iss1_1, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_3, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_5, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_6, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_7, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_8, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(iss1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(a) ISS vs. Non-ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat1_1.gph", replace	

coefplot (pub1_1, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_3, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_5, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_6, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_7, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_8, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pub1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(b) Public ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat1_2.gph", replace	

coefplot (pri1_1, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_2, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_3, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_4, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_5, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_6, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_7, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_8, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_9, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))) ///
	(pri1_10, keep(i_iss) msymbol(D) msize(*2.5) mc(navy%40) ciopts(lwidth(*3) lcolor(navy%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(c) Private ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat1_3.gph", replace	

coefplot (iss2_1, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_2, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_3, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_4, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_5, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_6, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_7, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_8, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_9, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(iss2_10, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(a) ISS vs. Non-ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat2_1.gph", replace	

coefplot (pub2_1, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_2, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_3, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pub2_4, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_5, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_6, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_7, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_8, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_9, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pub2_10, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(b) Public ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat2_2.gph", replace	

coefplot (pri2_1, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pri2_2, keep(i_iss) msymbol(S) msize(*3.5) mc(dkorange) ciopts(lwidth(*4.5) lcolor(dkorange))) ///
	(pri2_3, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_4, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_5, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_6, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_7, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_8, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_9, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))) ///
	(pri2_10, keep(i_iss) msymbol(S) msize(*2.5) mc(dkorange%40) ciopts(lwidth(*3) lcolor(dkorange%40))), ///
	vertical yline(0, lw(*1.5) lc(%80) lp(dash)) legend(off) ///
	subti("(c) Private ISS", size(*2)) coeflabels(i_iss=" ") ylab(-2(1)3, labsize(*2))
gr save "temp/pat2_3.gph", replace	

grc1leg2 "temp/pat1_1.gph" "temp/pat1_2.gph" ///
	"temp/pat1_3.gph", ///
	loff row(1) ///
	xsize(20) ysize(7) xcomm ycomm 
gr export "CLEO/visuals/patcoef1.png", replace

grc1leg2 "temp/pat2_1.gph" "temp/pat2_2.gph" ///
	"temp/pat2_3.gph", ///
	loff row(1) ///
	xsize(20) ysize(7) xcomm ycomm 
gr export "CLEO/visuals/patcoef2.png", replace


** -- is it driven by direct or indirect? expoloratory (Extended figure 3)
est clear
u "data/temp/working_patent.dta", clear
g i_indirect = (i_iss==1 & i_directpatent==0)
keep if inrange(year_diff, -1, 1)
ppmlhdfe n_patci i_indirect n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/indirectrob.dta", replace ci level(95)

forval i = 2/10 {
	u "data/temp/working_patent.dta", clear
	keep if inrange(year_diff, -`i', `i')
	g i_indirect = (i_iss==1 & i_directpatent==0)
	ppmlhdfe n_patci i_indirect n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	regsave using "data/temp/indirectrob.dta", append ci level(95)
}

u "data/temp/indirectrob.dta", clear
g model_id = (var == "i_indirect")
replace model_id = sum(model_id)
keep if var=="i_indirect"
replace model_id = (model_id-11) * -1
g significant_coef = 1 if (ci_lower>0) | (ci_upper<0)
replace significant_coef = 0 if mi(significant_coef)
g model_id1 = model_id-0.1
g model_id2 = model_id+0.1
sort var model_id
save "data/temp/indirectrob.dta", replace

u "data/temp/working_patent.dta", clear
keep if inrange(year_diff, -1, 1)
ppmlhdfe n_patci i_directpatent n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
regsave using "data/temp/directrob.dta", replace ci level(95)

forval i = 2/10 {
	u "data/temp/working_patent.dta", clear
	keep if inrange(year_diff, -`i', `i')
	ppmlhdfe n_patci i_directpatent n_inventor breath subject_freq, a(patent_year inventor_id) clu(inventor_id)
	regsave using "data/temp/directrob.dta", append ci level(95)
}

u "data/temp/directrob.dta", clear
g model_id = (var == "i_directpatent")
replace model_id = sum(model_id)
keep if var=="i_directpatent"
replace model_id = (model_id-11) * -1
g significant_coef = 1 if (ci_lower>0) | (ci_upper<0)
replace significant_coef = 0 if mi(significant_coef)
g model_id1 = model_id-0.1
g model_id2 = model_id+0.1
sort var model_id
save "data/temp/directrob.dta", replace

u "data/temp/indirectrob.dta", clear
append using "data/temp/directrob.dta"

tw (scatter coef model_id if var=="i_directpatent" & significant_coef==1, msymbol(O) msize(*2) mc(dkorange)) ///
	(scatter coef model_id1 if var=="i_indirect" & significant_coef==1, msymbol(D) msize(*2) mc(navy)) ///
	(scatter coef model_id if var=="i_directpatent" & significant_coef==0, msymbol(O) msize(*2) mc(gray%75)) ///
	(scatter coef model_id1 if var=="i_indirect" & significant_coef==0, msymbol(D) msize(*1.5) mc(gray%75)), ///
	legend(order(1 "Direct ISS patents" 2 "Patents citing ISS papers") size(*1.8) pos(9) col(1)) ///
	xlab(1(1)10, labsize(*2) grid) ylab(-0.5(0.5)1, labsize(*2)) ymtick(-0.5(0.25)1, grid) ///
	yline(0, lw(*1.5) lp(dash)) xsize(20) ysize(12) xti("Year", size(*1.5)) yti("Estimated coefficient", size(*1.5))
gr export "CLEO/visuals/patcoef3.png", replace


	
********** Summary table (Extended table 2)
u "data/temp/working_publication2.dta", clear
keep if inrange(year_diff, -5, 5)
codebook journal
	// 408 journals
codebook pmid
	// 1,312 publications 
codebook pmid if i_iss == 1  
	// 212 publications from ISS experiments
codebook pmid if i_private == 1
	// 74 publications by private developer or PI
codebook author_id
	// 694 authors
sum year
	// 1997-2021
	
keep pmid n_citation citation_rate_y n_author breath subject_freq
duplicates drop 

label var n_citation "Total number of paper citations"
label var citation_rate_y "Average anuual paper citation rate"

est clear
eststo tabsum: estpost sum n_citation citation_rate_y n_author breath subject_freq, detail

esttab using "CLEO/visuals/sumstats.tex", replace ///
	refcat(n_citation "\textbf{A. Papers and Authors Sample (N=2,674)}", nolabel) ///
	collabels(\multicolumn{1}{c}{{Mean}} \multicolumn{1}{c}{{Median}} \multicolumn{1}{c}{{Min}} \multicolumn{1}{c}{{Max}} \multicolumn{1}{c}{{Std.Dev.}}) ///
	cells("mean(fmt(3)) p50(fmt(0)) min(fmt(0)) max(fmt(0)) sd(fmt(3))") label nonumber noobs alignment(S) booktabs f nomtitle plain

u "data/temp/working_patent.dta", clear
keep if inrange(year_diff, -5, 5)
codebook patent_id 
	// 883 patents
codebook patent_id if i_iss==1
	// 98 patents related to ISS
codebook patent_id if i_directpatent==1
	// 41 directly reported to ISS 
codebook patent_id if i_private==1
	// 21 from private experiments
codebook inventor_id
	// 145 inventors
sum patent_year
	// 2006-2021
	
keep n_patci patent_cite_y n_inventor breath subject_freq
duplicates drop

label var n_patci "Total number of patent citations"
label var patent_cite_y "Average anuual patent citation rate"

eststo tabsum: estpost sum n_patci patent_cite_y n_inventor breath subject_freq, detail

esttab using "CLEO/visuals/sumstats.tex", append ///
	refcat(n_patci "\textbf{D. Patents and Inventors Sample (N=2,722)}", nolabel) ///
	cells("mean(fmt(3)) p50(fmt(0)) min(fmt(0)) max(fmt(0)) sd(fmt(3))") label nonumber noobs alignment(S) booktabs f collabels(none) nomtitle plain	

	
	
*******************************************************************************
** Datasets used in this paper
u "data/all_experiments_developer_pi.dta", clear	
export excel "/Users/maxwang/Library/CloudStorage/Dropbox/Dissertation/ISS/CLEO/data/all_experiments_developer_pi.xlsx", replace first(var)



********************************************************************************
